clear all
set more off
cap log close

***************************************************************************************************
* Program: measure_police_time.do
* Purpose: Create BG level police hours 
* Sections:
*     1. All pings in the station
*     2. Qualified Phones
*     3. Construct Shifts PD interval 
*     4. Collapse to BG level 
* Files Used:
*     1. bgs_in_`city'.dta
*     2. PD_interval_24_matched_bg.dta 
*        (ping-level smartphone data; not provided due to privacy reason)
*     3. Police_Stations.dta 
* Files Created:
*     1. all_bgs_in_21cities.dta
*     2. patrol_time_in_bg.dta
*     3. patrol_time_in_bg_nonwork.dta
*     4. police_hour_by_shift.dta
* NOTES: Need to ssc install 
*        - gtools, outreg2, sutex2, geoinpoly, geonear, reghdfe - if not already installed.
***************************************************************************************************

************************************************************************
** set macros
************************************************************************

******************************************************************** 
** Combine all block groups inside the police district 
********************************************************************
clear 
local cities "Austin Boston Charlotte Chicago Columbus DC Dallas Denver Detroit FortWorth Houston Indianapolis LA Nashville NY OklahomaCity Philadelphia Phoenix SanAntonio SanDiego SanFrancisco"
foreach city of local cities{
append using "${city_bg}/bgs_in_`city'.dta"
}
destring gidbg, replace
keep city gidbg district_ID INTPTLAT INTPTLON
save "${DataCleaned}/all_bgs_in_21cities.dta",replace

******************************************************************** 
** Create Police Shifts 
********************************************************************


******************************************************************** 
** Create different versions of BG police hour data 
********************************************************************

* create a program to filter pings to recycle code 
cap program drop filter_pings
program define filter_pings 
	
	* for each intervals, identify the station that the shift starts and ends with
	gen start_BuildingID_ = Building_ID if utc_timestamp == time_firstPD
	gen end_BuildingID_ = Building_ID if utc_timestamp == time_lastPD

	gegen start_BuildingID = max(start_BuildingID_), by(PD_max_interval)
	gegen end_BuildingID = max(end_BuildingID_), by(PD_max_interval)
	drop  start_BuildingID_ end_BuildingID_

	* only keep trips that starts and ends with the same station 
	gen byte same_start_end = (start_BuildingID==end_BuildingID)
	keep if same_start_end
	drop same_start_end

	* calculate ping speed 
	sort Master_ID_PostFeb17 utc_timestamp PD_max_interval 
	gen double sec_diff = utc_timestamp - utc_timestamp[_n-1] if PD_max_interval == PD_max_interval[_n-1]
	gen double lat_prev_ping = latitude[_n-1] if PD_max_interval == PD_max_interval[_n-1]
	gen double lon_prev_ping = longitude[_n-1] if PD_max_interval == PD_max_interval[_n-1]
	geodist latitude longitude lat_prev_ping lon_prev_ping, generate(miles_since_last_ping) miles
	gen double ping_speed_mph = miles_since_last_ping / (sec_diff / 3600)

	drop city station Building_ID_new 
	ren start_BuildingID Building_ID_new
	merge m:1 Building_ID_new using "${DataRaw}/Police_Stations.dta", keepusing(city station) keep(match) nogenerate

	ren Building_ID_new start_BuildingID
	ren city start_city
	ren station start_station

	* keep shifts with duration >= 4 hr
	keep if hour_diff_1stlastPD >= 4

	* drop fast pings (speed > 50 mph)
	keep if mode == 0 & ping_speed_mph <= 50 
	
	* drop pings where start_city != the city that the block group belongs to 
	merge m:1 gidbg using "${DataCleaned}/all_bgs_in_21cities.dta", keepusing(city gidbg district_ID) 

	replace city = upper(city)
	replace start_city = subinstr(start_city, " ","",.)
	replace city = "LOSANGELES" if city=="LA"
	replace city = "NEWYORKCITY" if city == "NY"
	replace city = "WASHINGTON" if city == "DC"

	drop if city!=start_city & _merge==3
	drop if _merge == 2 // * fill in the zeros later
	drop _merge

end

***************************************************************
* create BG-level police hour 
* using all pings less than 50 mphs and shift >= 4 hours
***************************************************************

use "${DataRaw}/PD_interval_24_matched_bg.dta", clear

* run program to filter ping 
filter_pings 

* calculate total officer-hours in bg
gegen hour_in_bg = total(ping_duration/3600), by(gidbg start_city) 

* calculate number of officers 
gunique Master_ID_PostFeb17, by(gidbg start_city) generate(N_officers_in_bg)

* calculate number of shifts 
gunique PD_max_interval, by(gidbg start_city) generate(N_shifts_in_bg)

* keep unique city-bg observation * 
egen tag_bg_city = tag(gidbg start_city) 
keep if tag_bg_city
drop tag_bg_city

keep gidbg start_city hour_in_bg N_officers_in_bg N_shifts_in_bg

* merge with all block groups in the city to fill in zero police hours 
merge m:1 gidbg using "${DataCleaned}/all_bgs_in_21cities.dta", ///
	keepusing(city gidbg district_ID)

* drop all block groups not within the city limits
drop if _merge==1

* fill in the zeros
foreach var of varlist hour_in_bg N_officers_in_bg N_shifts_in_bg{
replace `var' = 0 if missing(`var')
}

replace city = upper(city)
replace start_city = subinstr(start_city, " ","",.)
replace city = "LOSANGELES" if city=="LA"
replace city = "NEWYORKCITY" if city == "NY"
replace city = "WASHINGTON" if city == "DC"

assert city==start_city if _merge==3
drop start_city // start_city is missing if no officers visit it
drop _merge 

compress
save "${DataCleaned}/patrol_time_in_bg.dta",replace

***************************************************************
* create BG-level police hour 
* using pings less than 50 mphs and shift >= 4 hours
* remove workday 9-5 pm pings 
***************************************************************
use "${DataRaw}/PD_interval_24_matched_bg.dta", clear

* run program to filter ping 
filter_pings 

* merge with timezone information * (Note: timezone_coor.dta and timezone_data.dta are NOT in the ${DataRaw} folder)
geoinpoly latitude longitude using "${DataRaw}/timezone_coor.dta"
merge m:1 _ID using "${DataRaw}/timezone_data.dta"

gen UTC_DT_offset = 7 if tzid == "America/Los_Angeles" | tzid == "America/Phoenix"
replace UTC_DT_offset = 4 if tzid == "America/New_York" | tzid == "America/Indiana/Indianapolis" | tzid == "America/Detroit"
replace UTC_DT_offset = 5 if tzid == "America/Chicago" 
replace UTC_DT_offset = 6 if tzid == "America/Denver" 

drop if UTC_DT_offset == . // Honolulu pings? 

gen Hour_UTC = hh((utc_timestamp * 1000) + mdyhms(1,1,1970,0,0,0))

gen Stata_Date_local = StataDate_UTC 
replace Stata_Date_local = Stata_Date_local - 1 if Hour_UTC - UTC_DT_offset < 0 

gen Hour_local = Hour_UTC - UTC_DT_offset 
replace Hour_local = Hour_local + 24 if Hour_UTC - UTC_DT_offset < 0 

**************************************
* adjust for non-daylight saving time
* DST 2017 began at March 12 and ended at 2:00 AM on Sunday, November 5
**************************************
replace Hour_local = Hour_local - 1 if ((Stata_Date_local == date("3/12/2017","MDY") & Hour_local <= 2) | (Stata_Date_local < date("3/12/2017","MDY"))) & tzid != "America/Phoenix"
replace Hour_local = Hour_local - 1 if ((Stata_Date_local == date("11/5/2017","MDY") & Hour_local >= 2) | (Stata_Date_local > date("11/5/2017","MDY") & Stata_Date_local !=. )) & tzid != "America/Phoenix"

replace Stata_Date_local = Stata_Date_local - 1 if Hour_local < 0 
replace Hour_local = Hour_local + 24 if Hour_local < 0 
format Stata_Date_local %td

gen day_of_week = dow(Stata_Date_local)
* keep pings in nonwork hour (Weekday 9-5)
drop if (day_of_week>=1 & day_of_week<=5) & (Hour_local >= 9 & Hour_local <= 17)
	
* calculate total officer-hours in bg
gegen hour_in_bg = total(ping_duration/3600), by(gidbg start_city) 

* calculate number of officers 
gunique Master_ID_PostFeb17, by(gidbg start_city) generate(N_officers_in_bg)

* calculate number of shifts 
gunique PD_max_interval, by(gidbg start_city) generate(N_shifts_in_bg)

* keep unique city-bg observation * 
egen tag_bg_city = tag(gidbg start_city) 
keep if tag_bg_city
drop tag_bg_city

keep gidbg start_city hour_in_bg N_officers_in_bg N_shifts_in_bg

* merge with all block groups in the city to fill in zero police hours 
merge m:1 gidbg using "${DataCleaned}/all_bgs_in_21cities.dta", ///
	keepusing(city gidbg district_ID)

* drop all block groups not within the city limits
drop if _merge==1

* fill in the zeros
foreach var of varlist hour_in_bg N_officers_in_bg N_shifts_in_bg{
replace `var' = 0 if missing(`var')
}

replace city = upper(city)
replace start_city = subinstr(start_city, " ","",.)
replace city = "LOSANGELES" if city=="LA"
replace city = "NEWYORKCITY" if city == "NY"
replace city = "WASHINGTON" if city == "DC"

assert city==start_city if _merge==3
drop start_city // start_city is missing if no officers visit it

compress
save "${DataCleaned}/patrol_time_in_bg_nonwork.dta",replace

***************************************************************
* create BG-level police hour 
* using all pings less than 50 mphs and shift >= 4 hours
***************************************************************

use "${DataRaw}/PD_interval_24_matched_bg.dta", clear

* run program to filter ping 
filter_pings

* calculate Hour since shift starts 
gen hour_since_shift_start = (utc_timestamp - time_firstPD)/3600
replace hour_since_shift_start = floor(hour_since_shift_start) + 1

* calculate police time by shift 
gegen hour_in_bg_hr = total(ping_duration/3600), by(gidbg start_city hour_since_shift_start)

* calculate number of officers / shifts in bg by hour
gunique PD_max_interval, by(gidbg start_city hour_since_shift_start) generate(N_shifts_in_bg_hr)

* keep unique city-bg-shift_hour observation  
egen tag_bg_city_hr = tag(gidbg start_city hour_since_shift_start) 
keep if tag_bg_city_hr
drop tag_bg_city_hr

keep gidbg start_city hour_in_bg_hr N_shifts_in_bg_hr hour_since_shift_start
compress

* reshape from long to wide (create hour_in_)*
reshape wide hour_in_bg_hr N_shifts_in_bg_hr, i(gidbg start_city) j(hour_since_shift_start)

* merge with all block groups in the city
merge m:1 gidbg using "${DataCleaned}/all_bgs_in_21cities.dta", keepusing(city gidbg district_ID)

* drop all block groups not within the city limits
drop if _merge==1

* fill in the zeros
forval j = 1/12{
replace hour_in_bg_hr`j' = 0 if missing(hour_in_bg_hr`j')
replace N_shifts_in_bg_hr`j' = 0 if missing(N_shifts_in_bg_hr`j')

}

* create arinh values
forval j = 1/12{
gen asinh_hour_in_bg_hr`j' = asinh(hour_in_bg_hr`j')
gen asinh_N_shifts_in_bg_hr`j' = asinh(N_shifts_in_bg_hr`j')

}

* uniform city name 
replace city = upper(city)
replace start_city = subinstr(start_city, " ","",.)
replace city = "LOSANGELES" if city=="LA"
replace city = "NEWYORKCITY" if city == "NY"
replace city = "WASHINGTON" if city == "DC"

assert city==start_city if _merge==3
drop _merge

save "${DataCleaned}/police_hour_by_shift.dta", replace
